
package org.apache.solr.schema;

import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.*;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.core.KeywordTokenizerFactory;
import org.apache.lucene.analysis.util.*;
import org.apache.lucene.util.Version;
import org.apache.solr.analysis.TokenizerChain;
import org.apache.solr.common.SolrException;
import org.apache.solr.core.Config;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.util.DOMUtil;
import org.apache.solr.util.plugin.AbstractPluginLoader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.*;

public final class FieldTypePluginLoader
        extends AbstractPluginLoader<FieldType> {

    private static final String LUCENE_MATCH_VERSION_PARAM = IndexSchema.LUCENE_MATCH_VERSION_PARAM;
    protected final static Logger log = LoggerFactory.getLogger(FieldTypePluginLoader.class);
    private final XPath xpath = XPathFactory.newInstance().newXPath();

    /**
     * @param schema The schema that will be used to initialize the FieldTypes
     * @param fieldTypes All FieldTypes that are instantiated by this Plugin
     * Loader will be added to this Map
     * @param schemaAware Any SchemaAware objects that are instantiated by this
     * Plugin Loader will be added to this collection.
     */
    public FieldTypePluginLoader(final IndexSchema schema, final Map<String, FieldType> fieldTypes, final Collection<SchemaAware> schemaAware) {

        super("[schema.xml] fieldType", FieldType.class, true, true);

        this.schema = schema;
        this.fieldTypes = fieldTypes;
        this.schemaAware = schemaAware;
    }
    private final IndexSchema schema;
    private final Map<String, FieldType> fieldTypes;
    private final Collection<SchemaAware> schemaAware;

    @Override
    protected FieldType create(SolrResourceLoader loader, String name, String className, Node node) throws Exception {

        FieldType ft = loader.newInstance(className, FieldType.class);
        ft.setTypeName(name);

        String expression = "./analyzer[@type='query']";
        Node anode = (Node) xpath.evaluate(expression, node, XPathConstants.NODE);
        Analyzer queryAnalyzer = readAnalyzer(anode);

        expression = "./analyzer[@type='multiterm']";
        anode = (Node) xpath.evaluate(expression, node, XPathConstants.NODE);
        Analyzer multiAnalyzer = readAnalyzer(anode);

        // An analyzer without a type specified, or with type="index"
        expression = "./analyzer[not(@type)] | ./analyzer[@type='index']";
        anode = (Node) xpath.evaluate(expression, node, XPathConstants.NODE);
        Analyzer analyzer = readAnalyzer(anode);

        // a custom similarity[Factory]
        expression = "./similarity";
        anode = (Node) xpath.evaluate(expression, node, XPathConstants.NODE);
        SimilarityFactory simFactory = IndexSchema.readSimilarity(loader, anode);

        if (queryAnalyzer == null) {
            queryAnalyzer = analyzer;
        }
        if (analyzer == null) {
            analyzer = queryAnalyzer;
        }
        if (multiAnalyzer == null) {
            multiAnalyzer = constructMultiTermAnalyzer(queryAnalyzer);
        }
        if (analyzer != null) {
            ft.setAnalyzer(analyzer);
            ft.setQueryAnalyzer(queryAnalyzer);
            if (ft instanceof TextField) {
                ((TextField) ft).setMultiTermAnalyzer(multiAnalyzer);
            }
        }
        if (simFactory != null) {
            ft.setSimilarity(simFactory.getSimilarity());
        }
        if (ft instanceof SchemaAware) {
            schemaAware.add((SchemaAware) ft);
        }
        return ft;
    }

    @Override
    protected void init(FieldType plugin, Node node) throws Exception {

        Map<String, String> params = DOMUtil.toMapExcept(node.getAttributes(), "name", "class");
        plugin.setArgs(schema, params);
    }

    @Override
    protected FieldType register(String name, FieldType plugin) throws Exception {

        log.trace("fieldtype defined: " + plugin);
        return fieldTypes.put(name, plugin);
    }

    // The point here is that, if no multiterm analyzer was specified in the schema file, do one of several things:
    // 1> If legacyMultiTerm == false, assemble a new analyzer composed of all of the charfilters,
    //    lowercase filters and asciifoldingfilter.
    // 2> If legacyMultiTerm == true just construct the analyzer from a KeywordTokenizer. That should mimic current behavior.
    //    Do the same if they've specified that the old behavior is required (legacyMultiTerm="true")
    private Analyzer constructMultiTermAnalyzer(Analyzer queryAnalyzer) {

        if (queryAnalyzer == null) {
            return null;
        }

        if (!(queryAnalyzer instanceof TokenizerChain)) {
            return new KeywordAnalyzer();
        }

        TokenizerChain tc = (TokenizerChain) queryAnalyzer;
        MultiTermChainBuilder builder = new MultiTermChainBuilder();

        CharFilterFactory[] charFactories = tc.getCharFilterFactories();
        if (charFactories != null) {
            for (CharFilterFactory fact : charFactories) {
                builder.add(fact);
            }
        }

        builder.add(tc.getTokenizerFactory());

        for (TokenFilterFactory fact : tc.getTokenFilterFactories()) {
            builder.add(fact);
        }

        return builder.build();
    }

    private static class MultiTermChainBuilder {

        static final KeywordTokenizerFactory keyFactory;

        static {
            keyFactory = new KeywordTokenizerFactory();
            keyFactory.init(new HashMap<String, String>());
        }
        ArrayList<CharFilterFactory> charFilters = null;
        ArrayList<TokenFilterFactory> filters = new ArrayList<>(2);
        TokenizerFactory tokenizer = keyFactory;

        public void add(Object current) {
            if (!(current instanceof MultiTermAwareComponent)) {
                return;
            }
            AbstractAnalysisFactory newComponent = ((MultiTermAwareComponent) current).getMultiTermComponent();
            if (newComponent instanceof TokenFilterFactory) {
                if (filters == null) {
                    filters = new ArrayList<>(2);
                }
                filters.add((TokenFilterFactory) newComponent);
            }
            else if (newComponent instanceof TokenizerFactory) {
                tokenizer = (TokenizerFactory) newComponent;
            }
            else if (newComponent instanceof CharFilterFactory) {
                if (charFilters == null) {
                    charFilters = new ArrayList<>(1);
                }
                charFilters.add((CharFilterFactory) newComponent);

            }
            else {
                throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown analysis component from MultiTermAwareComponent: " + newComponent);
            }
        }

        public TokenizerChain build() {

            CharFilterFactory[] charFilterArr = charFilters == null ? null : charFilters.toArray(new CharFilterFactory[charFilters.size()]);
            TokenFilterFactory[] filterArr = filters == null ? new TokenFilterFactory[0] : filters.toArray(new TokenFilterFactory[filters.size()]);
            return new TokenizerChain(charFilterArr, tokenizer, filterArr);
        }
    }

    //
    // <analyzer><tokenizer class="...."/><tokenizer class="...." arg="....">
    //
    private Analyzer readAnalyzer(Node node) throws XPathExpressionException {

        final SolrResourceLoader loader = schema.getResourceLoader();

        // parent node used to be passed in as "fieldtype"
        // if (!fieldtype.hasChildNodes()) return null;
        // Node node = DOMUtil.getChild(fieldtype,"analyzer");

        if (node == null) {
            return null;
        }
        NamedNodeMap attrs = node.getAttributes();
        String analyzerName = DOMUtil.getAttr(attrs, "class");

        // check for all of these up front, so we can error if used in 
        // conjunction with an explicit analyzer class.
        NodeList charFilterNodes = (NodeList) xpath.evaluate("./charFilter", node, XPathConstants.NODESET);
        NodeList tokenizerNodes = (NodeList) xpath.evaluate("./tokenizer", node, XPathConstants.NODESET);
        NodeList tokenFilterNodes = (NodeList) xpath.evaluate("./filter", node, XPathConstants.NODESET);

        if (analyzerName != null) {

            // explicitly check for child analysis factories instead of
            // just any child nodes, because the user might have their
            // own custom nodes (ie: <description> or something like that)
            if (0 != charFilterNodes.getLength()
                    || 0 != tokenizerNodes.getLength()
                    || 0 != tokenFilterNodes.getLength()) {
                throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
                        "Configuration Error: Analyzer class='" + analyzerName + "' can not be combined with nested analysis factories");
            }

            try {
                // No need to be core-aware as Analyzers are not in the core-aware list
                final Class<? extends Analyzer> clazz = loader.findClass(analyzerName, Analyzer.class);

                try {
                    // first try to use a ctor with version parameter 
                    // (needed for many new Analyzers that have no default one anymore)
                    Constructor<? extends Analyzer> cnstr = clazz.getConstructor(Version.class);
                    final String matchVersionStr = DOMUtil.getAttr(attrs, LUCENE_MATCH_VERSION_PARAM);
                    final Version luceneMatchVersion = (matchVersionStr == null)
                            ? schema.getDefaultLuceneMatchVersion()
                            : Config.parseLuceneVersionString(matchVersionStr);
                    if (luceneMatchVersion == null) {
                        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
                                "Configuration Error: Analyzer '" + clazz.getName() + "' needs a 'luceneMatchVersion' parameter");
                    }
                    return cnstr.newInstance(luceneMatchVersion);
                }
                catch (NoSuchMethodException nsme) {
                    // otherwise use default ctor
                    return clazz.newInstance();
                }
            }
            catch (SecurityException | SolrException | InstantiationException | IllegalAccessException | IllegalArgumentException | InvocationTargetException e) {
                log.error("Cannot load analyzer: " + analyzerName, e);
                throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Cannot load analyzer: " + analyzerName, e);
            }
        }

        // Load the CharFilters

        final ArrayList<CharFilterFactory> charFilters = new ArrayList<>();
        AbstractPluginLoader<CharFilterFactory> charFilterLoader =
                new AbstractPluginLoader<CharFilterFactory>("[schema.xml] analyzer/charFilter", CharFilterFactory.class, false, false) {
                    @Override
                    protected void init(CharFilterFactory plugin, Node node) throws Exception {
                        if (plugin != null) {
                            final Map<String, String> params = DOMUtil.toMapExcept(node.getAttributes(), "class");

                            String configuredVersion = params.remove(LUCENE_MATCH_VERSION_PARAM);
                            plugin.setLuceneMatchVersion(parseConfiguredVersion(configuredVersion, plugin.getClass().getSimpleName()));

                            plugin.init(params);
                            charFilters.add(plugin);
                        }
                    }

                    @Override
                    protected CharFilterFactory register(String name,
                            CharFilterFactory plugin) {
                        return null; // used for map registration
                    }
                };

        charFilterLoader.load(loader, charFilterNodes);

        // Load the Tokenizer
        // Although an analyzer only allows a single Tokenizer, we load a list to make sure
        // the configuration is ok

        final ArrayList<TokenizerFactory> tokenizers = new ArrayList<>(1);
        AbstractPluginLoader<TokenizerFactory> tokenizerLoader =
                new AbstractPluginLoader<TokenizerFactory>("[schema.xml] analyzer/tokenizer", TokenizerFactory.class, false, false) {
                    @Override
                    protected void init(TokenizerFactory plugin, Node node) throws Exception {
                        if (!tokenizers.isEmpty()) {
                            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "The schema defines multiple tokenizers for: " + node);
                        }
                        final Map<String, String> params = DOMUtil.toMapExcept(node.getAttributes(), "class");

                        String configuredVersion = params.remove(LUCENE_MATCH_VERSION_PARAM);
                        plugin.setLuceneMatchVersion(parseConfiguredVersion(configuredVersion, plugin.getClass().getSimpleName()));

                        plugin.init(params);
                        tokenizers.add(plugin);
                    }

                    @Override
                    protected TokenizerFactory register(String name, TokenizerFactory plugin) {
                        return null; // used for map registration
                    }
                };

        tokenizerLoader.load(loader, tokenizerNodes);

        // Make sure something was loaded
        if (tokenizers.isEmpty()) {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "analyzer without class or tokenizer");
        }

        // Load the Filters

        final ArrayList<TokenFilterFactory> filters = new ArrayList<>();

        AbstractPluginLoader<TokenFilterFactory> filterLoader =
                new AbstractPluginLoader<TokenFilterFactory>("[schema.xml] analyzer/filter", TokenFilterFactory.class, false, false) {
                    @Override
                    protected void init(TokenFilterFactory plugin, Node node) throws Exception {
                        if (plugin != null) {
                            final Map<String, String> params = DOMUtil.toMapExcept(node.getAttributes(), "class");

                            String configuredVersion = params.remove(LUCENE_MATCH_VERSION_PARAM);
                            plugin.setLuceneMatchVersion(parseConfiguredVersion(configuredVersion, plugin.getClass().getSimpleName()));

                            plugin.init(params);
                            filters.add(plugin);
                        }
                    }

                    @Override
                    protected TokenFilterFactory register(String name, TokenFilterFactory plugin) throws Exception {
                        return null; // used for map registration
                    }
                };
        filterLoader.load(loader, tokenFilterNodes);

        return new TokenizerChain(charFilters.toArray(new CharFilterFactory[charFilters.size()]),
                tokenizers.get(0), filters.toArray(new TokenFilterFactory[filters.size()]));
    }

    private Version parseConfiguredVersion(String configuredVersion, String pluginClassName) {
        Version version = (configuredVersion != null)
                ? Config.parseLuceneVersionString(configuredVersion) : schema.getDefaultLuceneMatchVersion();

        if (!version.onOrAfter(Version.LUCENE_40)) {
            log.warn(pluginClassName + " is using deprecated " + version
                    + " emulation. You should at some point declare and reindex to at least 4.0, because "
                    + "3.x emulation is deprecated and will be removed in 5.0");
        }
        return version;
    }
}
